{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8b2Ydy6r5Bq0",
        "outputId": "e55fe8ae-3711-4162-f8df-323adcfeba70"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\u001b[?25l     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/807.5 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K     \u001b[91m━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m153.6/807.5 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m807.5/807.5 kB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m42.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m173.7/173.7 kB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.0/13.0 MB\u001b[0m \u001b[31m76.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m252.6/252.6 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.6/66.6 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.4/227.4 kB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m74.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m345.4/345.4 kB\u001b[0m \u001b[31m18.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.5/138.5 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.8/77.8 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "bigframes 0.22.0 requires pandas<2.1.4,>=1.5.0, but you have pandas 2.2.1 which is incompatible.\n",
            "google-colab 1.0.0 requires pandas==1.5.3, but you have pandas 2.2.1 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0m"
          ]
        }
      ],
      "source": [
        "!pip install -qU langchain langchain-openai langchain-community langchain-experimental pandas"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import getpass\n",
        "import os\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "74jPm9Fm5I4p",
        "outputId": "08db5eef-8571-4a05-b224-fedda3f39c2e"
      },
      "execution_count": 2,
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "··········\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!wget https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv -O titanic.csv"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9fxV7zV85MFI",
        "outputId": "80afc8b7-a24d-49b2-8598-9db175372681"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--2024-03-06 08:35:23--  https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv\n",
            "Resolving web.stanford.edu (web.stanford.edu)... 171.67.215.200, 2607:f6d0:0:925a::ab43:d7c8\n",
            "Connecting to web.stanford.edu (web.stanford.edu)|171.67.215.200|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 44225 (43K) [text/csv]\n",
            "Saving to: ‘titanic.csv’\n",
            "\n",
            "titanic.csv         100%[===================>]  43.19K  --.-KB/s    in 0.1s    \n",
            "\n",
            "2024-03-06 08:35:23 (310 KB/s) - ‘titanic.csv’ saved [44225/44225]\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "\n",
        "df = pd.read_csv(\"titanic.csv\")\n",
        "print(df.shape)\n",
        "print(df.columns.tolist())"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Kzb8dcZD5xQi",
        "outputId": "16ec6780-c714-4749-dbe0-6ce146f4c2ec"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(887, 8)\n",
            "['Survived', 'Pclass', 'Name', 'Sex', 'Age', 'Siblings/Spouses Aboard', 'Parents/Children Aboard', 'Fare']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain_community.utilities import SQLDatabase\n",
        "from sqlalchemy import create_engine\n",
        "\n",
        "engine = create_engine(\"sqlite:///titanic.db\")\n",
        "df.to_sql(\"titanic\", engine, index=False)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fNpl92Wl6Huq",
        "outputId": "1cffd0ef-d8b0-4b7d-cca7-89ca214c43a2"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "887"
            ]
          },
          "metadata": {},
          "execution_count": 5
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "db = SQLDatabase(engine=engine)\n",
        "print(db.dialect)\n",
        "print(db.get_usable_table_names())\n",
        "db.run(\"SELECT * FROM titanic WHERE Age < 2;\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 146
        },
        "id": "zSHQ0CH163XE",
        "outputId": "0aacc655-76f6-4a27-c9da-d9b28349884b"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "sqlite\n",
            "['titanic']\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "\"[(1, 2, 'Master. Alden Gates Caldwell', 'male', 0.83, 0, 2, 29.0), (0, 3, 'Master. Eino Viljami Panula', 'male', 1.0, 4, 1, 39.6875), (1, 3, 'Miss. Eleanor Ileen Johnson', 'female', 1.0, 1, 1, 11.1333), (1, 2, 'Master. Richard F Becker', 'male', 1.0, 2, 1, 39.0), (1, 1, 'Master. Hudson Trevor Allison', 'male', 0.92, 1, 2, 151.55), (1, 3, 'Miss. Maria Nakid', 'female', 1.0, 0, 2, 15.7417), (0, 3, 'Master. Sidney Leonard Goodwin', 'male', 1.0, 5, 2, 46.9), (1, 3, 'Miss. Helene Barbara Baclini', 'female', 0.75, 2, 1, 19.2583), (1, 3, 'Miss. Eugenie Baclini', 'female', 0.75, 2, 1, 19.2583), (1, 2, 'Master. Viljo Hamalainen', 'male', 0.67, 1, 1, 14.5), (1, 3, 'Master. Bertram Vere Dean', 'male', 1.0, 1, 2, 20.575), (1, 3, 'Master. Assad Alexander Thomas', 'male', 0.42, 0, 1, 8.5167), (1, 2, 'Master. Andre Mallet', 'male', 1.0, 0, 2, 37.0042), (1, 2, 'Master. George Sibley Richards', 'male', 0.83, 1, 1, 18.75)]\""
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain_community.agent_toolkits import create_sql_agent\n",
        "from langchain_openai import ChatOpenAI\n",
        "\n",
        "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
        "agent_executor = create_sql_agent(llm, db=db, agent_type=\"openai-tools\", verbose=True)"
      ],
      "metadata": {
        "id": "N5E8ZaEi79kA"
      },
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "agent_executor.invoke({\"input\": \"what's the average age of survivors\"})"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xu-0Brbc8BxZ",
        "outputId": "7cf1d583-721d-4423-c029-ea42c46d33ff"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "\n",
            "\u001b[1m> Entering new SQL Agent Executor chain...\u001b[0m\n",
            "\u001b[32;1m\u001b[1;3m\n",
            "Invoking: `sql_db_list_tables` with ``\n",
            "\n",
            "\n",
            "\u001b[0m\u001b[38;5;200m\u001b[1;3mtitanic\u001b[0m\u001b[32;1m\u001b[1;3m\n",
            "Invoking: `sql_db_schema` with `{'table_names': 'titanic'}`\n",
            "\n",
            "\n",
            "\u001b[0m\u001b[33;1m\u001b[1;3m\n",
            "CREATE TABLE titanic (\n",
            "\t\"Survived\" BIGINT, \n",
            "\t\"Pclass\" BIGINT, \n",
            "\t\"Name\" TEXT, \n",
            "\t\"Sex\" TEXT, \n",
            "\t\"Age\" FLOAT, \n",
            "\t\"Siblings/Spouses Aboard\" BIGINT, \n",
            "\t\"Parents/Children Aboard\" BIGINT, \n",
            "\t\"Fare\" FLOAT\n",
            ")\n",
            "\n",
            "/*\n",
            "3 rows from titanic table:\n",
            "Survived\tPclass\tName\tSex\tAge\tSiblings/Spouses Aboard\tParents/Children Aboard\tFare\n",
            "0\t3\tMr. Owen Harris Braund\tmale\t22.0\t1\t0\t7.25\n",
            "1\t1\tMrs. John Bradley (Florence Briggs Thayer) Cumings\tfemale\t38.0\t1\t0\t71.2833\n",
            "1\t3\tMiss. Laina Heikkinen\tfemale\t26.0\t0\t0\t7.925\n",
            "*/\u001b[0m\u001b[32;1m\u001b[1;3m\n",
            "Invoking: `sql_db_query` with `SELECT AVG(Age) AS Average_Age FROM titanic WHERE Survived = 1`\n",
            "\n",
            "\n",
            "\u001b[0m\u001b[36;1m\u001b[1;3m[(28.408391812865496,)]\u001b[0m\u001b[32;1m\u001b[1;3mThe average age of survivors in the Titanic dataset is approximately 28.41 years.\u001b[0m\n",
            "\n",
            "\u001b[1m> Finished chain.\u001b[0m\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'input': \"what's the average age of survivors\",\n",
              " 'output': 'The average age of survivors in the Titanic dataset is approximately 28.41 years.'}"
            ]
          },
          "metadata": {},
          "execution_count": 8
        }
      ]
    }
  ]
}